{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "nBty5iAb_VyE"
   },
   "source": [
    "# A Simple Example of Properties of IV estimator when Instruments are Weak"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "atRvUXGn_VFW"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.stats import norm\n",
    "from scipy.stats import gaussian_kde\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.sandbox.regression.gmm import IV2SLS\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "wLevLetuBzrs"
   },
   "source": [
    "Simulation Design"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7oacBDrQB1AU"
   },
   "outputs": [],
   "source": [
    "np.random.seed(123)\n",
    "\n",
    "n = 100\n",
    "beta = 0.1  # 0.1 weak IV\n",
    "# beta = 1  # 1 strong IV\n",
    "\n",
    "# One realization\n",
    "U = norm.rvs(size=n)\n",
    "Z = norm.rvs(size=n)  # generate instrument\n",
    "D = beta * Z + U      # generate endogenous variable\n",
    "Y = D + U             # the true causal effect is 1\n",
    "\n",
    "# First stage regression\n",
    "Z1 = sm.add_constant(Z)\n",
    "model_first_stage = sm.OLS(D, Z1)\n",
    "results_first_stage = model_first_stage.fit()\n",
    "print(results_first_stage.summary())  # first stage is very weak here when we set beta = .1\n",
    "\n",
    "# IV regression\n",
    "D1 = sm.add_constant(D)\n",
    "model_iv = IV2SLS(Y, D1, Z1)\n",
    "results_iv = model_iv.fit()\n",
    "print(results_iv.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "-Y7yS_mvGyt9"
   },
   "source": [
    "Note that the instrument is weak here (strength of the instrument is controlled by setting $\\beta$) -- the t-stat is smaller than any rule-of-thumb suggested in the literature (e.g. $\\sqrt{10}$) ."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "f4wBKUdTG0O2"
   },
   "source": [
    "# Run 10000 trials to evaluate distribution of the IV estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "el9qoJqbG258"
   },
   "outputs": [],
   "source": [
    "np.random.seed(123)\n",
    "\n",
    "B = 10000  # trials\n",
    "IVEst = np.zeros(B)\n",
    "\n",
    "for i in range(B):\n",
    "    U = norm.rvs(size=n)\n",
    "    Z = norm.rvs(size=n)  # generate instrument\n",
    "    D = beta * Z + U  # generate endogenous variable\n",
    "    Y = D + U  # the true causal effect is 1\n",
    "\n",
    "    # IV regression\n",
    "    model_iv = IV2SLS(Y, D, Z)\n",
    "    results_iv = model_iv.fit()\n",
    "    IVEst[i] = results_iv.params[0]  # Coefficient of D"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "aXLgc8TkIzYf"
   },
   "source": [
    "# Plot the Actual Distribution against the Normal Approximation (based on Strong Instrument Assumption)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "DDr-glMeI2Fk"
   },
   "outputs": [],
   "source": [
    "# Plotting the density of IVEst\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.xlim(-5, 5)\n",
    "plt.xlabel(\"IV Estimator - True Effect\")\n",
    "plt.title(\"Actual Distribution vs Gaussian\")\n",
    "\n",
    "# Plotting density estimate of simulated IV coefficients\n",
    "val = np.linspace(-5, 5, 200)\n",
    "kde = gaussian_kde(IVEst - 1, bw_method=.001)  # Need to play with bandwidth depending on problem features.\n",
    "density = kde(np.linspace(-5, 5, 1000))\n",
    "plt.plot(np.linspace(-5, 5, 1000), density, color='blue', label='Actual Distribution')\n",
    "\n",
    "# Plotting Gaussian distribution\n",
    "var = (1 / (beta ** 2)) * (1 / n)  # theoretical variance of IV\n",
    "sd = np.sqrt(var)\n",
    "gaussian = norm.pdf(val, scale=sd)\n",
    "plt.plot(val, gaussian, color='red', linestyle='--', label='Gaussian Distribution')\n",
    "\n",
    "plt.legend()\n",
    "\n",
    "# Calculating rejection frequency\n",
    "rejection_frequency = np.sum(np.abs(IVEst - 1) / sd > 1.96) / B\n",
    "print(\"Rejection Frequency is\", rejection_frequency, \"while we expect it to be around 0.05\")\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
